library(readr)
## Warning: package 'readr' was built under R version 3.6.1
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.6.1
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.6.1
library(DT)
## Warning: package 'DT' was built under R version 3.6.1
library(ggrepel)
## Warning: package 'ggrepel' was built under R version 3.6.1
library(leaflet)
## Warning: package 'leaflet' was built under R version 3.6.1
library(janitor)
## Warning: package 'janitor' was built under R version 3.6.1
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(MASS)
## Warning: package 'MASS' was built under R version 3.6.1
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
library(scales)
## Warning: package 'scales' was built under R version 3.6.1
##
## Attaching package: 'scales'
## The following object is masked from 'package:readr':
##
## col_factor
library(ggmap)
## Warning: package 'ggmap' was built under R version 3.6.1
## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
## Please cite ggmap if you use it! See citation("ggmap") for details.
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.6.1
## -- Attaching packages -------------------------------------------------------------------------------- tidyverse 1.2.1 --
## v tibble 2.1.3 v purrr 0.3.2
## v tidyr 1.0.0 v stringr 1.4.0
## v tibble 2.1.3 v forcats 0.4.0
## Warning: package 'tibble' was built under R version 3.6.1
## Warning: package 'tidyr' was built under R version 3.6.1
## Warning: package 'purrr' was built under R version 3.6.1
## Warning: package 'stringr' was built under R version 3.6.1
## Warning: package 'forcats' was built under R version 3.6.1
## -- Conflicts ----------------------------------------------------------------------------------- tidyverse_conflicts() --
## x scales::col_factor() masks readr::col_factor()
## x purrr::discard() masks scales::discard()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## x MASS::select() masks dplyr::select()
setwd("C:/Users/Jeffo/Downloads")
crime<- read.csv('crime.csv', header=T, na.strings=c("","NA")) #read in live data from datamontgomery
Using the glimpse function, we can see a preview of the dataset.
glimpse(crime)
## Observations: 177,029
## Variables: 30
## $ ï..Incident.ID <int> 201257896, 201257879, 201257875, 201258...
## $ Offence.Code <fct> 1217, 1313, 9107, 9105, 1399, 2303, 910...
## $ CR.Number <int> 190045675, 190045674, 190045669, 190045...
## $ Dispatch.Date...Time <fct> 09/24/2019 04:48:39 AM, 09/24/2019 04:3...
## $ NIBRS.Code <fct> 120, 13B, 90Z, 90Z, 13B, 23C, 90Z, 35A,...
## $ Victims <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
## $ Crime.Name1 <fct> Crime Against Property, Crime Against P...
## $ Crime.Name2 <fct> Robbery, Simple Assault, All Other Offe...
## $ Crime.Name3 <fct> ROBBERY - DOMESTIC, ASSAULT - SIMPLE, M...
## $ Police.District.Name <fct> MONTGOMERY VILLAGE, WHEATON, MONTGOMERY...
## $ Block.Address <fct> 400 BLK N FREDERICK AVE, 17300 BLK MON...
## $ City <fct> GAITHERSBURG, OLNEY, DERWOOD, ROCKVILLE...
## $ State <fct> MD, MD, MD, MD, MD, MD, MD, MD, MD, MD,...
## $ Zip.Code <int> 20877, 20832, 20855, 20850, 20850, 2085...
## $ Agency <fct> GPD, MCPD, MCPD, MCPD, RCPD, RCPD, MCPD...
## $ Place <fct> Residence - Apartment/Condo, Residence ...
## $ Sector <fct> R, J, R, B, A, A, R, P, P, K, R, R, H, ...
## $ Beat <fct> 6R1, 4J2, 6R3, 1B1, 1A3, 1A3, 6R2, 6P1,...
## $ PRA <fct> 548, 399, 700, 237, 278, 278, 546, 549,...
## $ Address.Number <int> 400, 17300, 17700, 10000, 600, 600, 100...
## $ Street.Prefix <fct> N, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ Street.Name <fct> FREDERICK, MONITOR, LISA, DARNESTOWN, H...
## $ Street.Suffix <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ Street.Type <fct> AVE, DR, DR, RD, DR, DR, RD, AVE, AVE, ...
## $ Start_Date_Time <fct> 09/24/2019 04:48:00 AM, 09/24/2019 04:3...
## $ End_Date_Time <fct> NA, NA, NA, 09/23/2019 07:30:00 PM, 09/...
## $ Latitude <dbl> 39.14573, 39.14137, 39.14631, 39.09690,...
## $ Longitude <dbl> -77.20378, -77.07187, -77.13965, -77.20...
## $ Police.District.Number <fct> 6D, 4D, 6D, 1D, 1D, 1D, 6D, 6D, 6D, 4D,...
## $ Location <fct> "(39.1457, -77.2038)", "(39.1414, -77.0...
The first part of the cleaning the Crime dataset is to convert all the values into lowercase letters. Since R programming is predominantly done in lower case, I convert the variable names to lower case to make my coding more consistent.
#convert all the values into lowercase for easier reading
proper_case <- function(x) {
return (gsub("\\b([A-Z])([A-Z]+)", "\\U\\1\\L\\2" , x, perl=TRUE))}
crime<- crime%>% mutate(Crime.Name3 = proper_case(Crime.Name3),
Police.District.Name = proper_case(Police.District.Name),
City = proper_case(City),
Street.Name= proper_case(Street.Name),
Street.Type= proper_case(Street.Type),
Block.Address=proper_case(Block.Address))
crime <- crime%>%
clean_names()
colnames(crime)[1] <- "incident_id"
colnames(crime)[8]<- "crime_name2"
colnames(crime)[9]<- "crime_name3"
glimpse(crime)
## Observations: 177,029
## Variables: 30
## $ incident_id <int> 201257896, 201257879, 201257875, 201258...
## $ offence_code <fct> 1217, 1313, 9107, 9105, 1399, 2303, 910...
## $ cr_number <int> 190045675, 190045674, 190045669, 190045...
## $ dispatch_date_time <fct> 09/24/2019 04:48:39 AM, 09/24/2019 04:3...
## $ nibrs_code <fct> 120, 13B, 90Z, 90Z, 13B, 23C, 90Z, 35A,...
## $ victims <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
## $ crime_name1 <fct> Crime Against Property, Crime Against P...
## $ crime_name2 <fct> Robbery, Simple Assault, All Other Offe...
## $ crime_name3 <chr> "Robbery - Domestic", "Assault - Simple...
## $ police_district_name <chr> "Montgomery Village", "Wheaton", "Montg...
## $ block_address <chr> "400 Blk N Frederick Ave", "17300 Blk ...
## $ city <chr> "Gaithersburg", "Olney", "Derwood", "Ro...
## $ state <fct> MD, MD, MD, MD, MD, MD, MD, MD, MD, MD,...
## $ zip_code <int> 20877, 20832, 20855, 20850, 20850, 2085...
## $ agency <fct> GPD, MCPD, MCPD, MCPD, RCPD, RCPD, MCPD...
## $ place <fct> Residence - Apartment/Condo, Residence ...
## $ sector <fct> R, J, R, B, A, A, R, P, P, K, R, R, H, ...
## $ beat <fct> 6R1, 4J2, 6R3, 1B1, 1A3, 1A3, 6R2, 6P1,...
## $ pra <fct> 548, 399, 700, 237, 278, 278, 546, 549,...
## $ address_number <int> 400, 17300, 17700, 10000, 600, 600, 100...
## $ street_prefix <fct> N, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ street_name <chr> "Frederick", "Monitor", "Lisa", "Darnes...
## $ street_suffix <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ street_type <chr> "Ave", "Dr", "Dr", "Rd", "Dr", "Dr", "R...
## $ start_date_time <fct> 09/24/2019 04:48:00 AM, 09/24/2019 04:3...
## $ end_date_time <fct> NA, NA, NA, 09/23/2019 07:30:00 PM, 09/...
## $ latitude <dbl> 39.14573, 39.14137, 39.14631, 39.09690,...
## $ longitude <dbl> -77.20378, -77.07187, -77.13965, -77.20...
## $ police_district_number <fct> 6D, 4D, 6D, 1D, 1D, 1D, 6D, 6D, 6D, 4D,...
## $ location <fct> "(39.1457, -77.2038)", "(39.1414, -77.0...
library(lubridate)
## Warning: package 'lubridate' was built under R version 3.6.1
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
crime<-crime%>%
mutate(
incident_date=mdy_hms(dispatch_date_time),
incident_hour=hour(incident_date),
incident_minute=minute(incident_date),
incident_month=month(incident_date),
incident_second=second(incident_date),
incident_year=year(incident_date),
atnight=(incident_hour<6) | (incident_hour>18),
marijuana=crime_name3==('Drugs - Marijuana - Sell'),
burglary=crime_name2==( 'Burglary/Breaking and Entering'),
drugs=crime_name2==('Drug/Narcotic Violations')
)
Using the filter function, I am able to create unique datsets that I can use when I am trying to anaylze data regarding only marijuana.
crime_marijuana<-filter(crime, marijuana==TRUE)
crime_burglary<-filter(crime, burglary==TRUE)
crime_night<-filter(crime, atnight==TRUE)
crime_day<-filter(crime, atnight==FALSE)
crime_burglarynight<-filter(crime_night, burglary==TRUE)
drugsatnight<-subset(crime_night, crime_name2 %in% c("Drug/Narcotic Violations"), drop = TRUE)
drugsatnight$crime_name3<- sub("-[^-]+$", "", drugsatnight$crime_name3)
marijuanaatnight<-drugsatnight%>%
filter(str_detect(crime_name3, 'Marijuana'))
generalmarijuanaatnight<-crime_night%>%
filter(str_detect(crime_name3, 'Marijuana'))
The data file contains 38 variables and 177029 rows. These variables include the city of the crime, the time of the crime, as well description of the crime. Most of the variables are factors, except for my created variables, which are logical variables.
str(crime)
## 'data.frame': 177029 obs. of 40 variables:
## $ incident_id : int 201257896 201257879 201257875 201258527 201257865 201257865 201257876 201257985 201257985 201257872 ...
## $ offence_code : Factor w/ 319 levels "0301","0302",..: 49 63 309 307 68 95 304 159 95 309 ...
## $ cr_number : int 190045675 190045674 190045669 190045664 190045660 190045660 190045661 190045658 190045658 190045659 ...
## $ dispatch_date_time : Factor w/ 136430 levels "01/01/2017 01:02:30 AM",..: 100813 100812 100811 100814 100437 100437 100436 100435 100435 100431 ...
## $ nibrs_code : Factor w/ 54 levels "09A","09B","09C",..: 9 11 54 54 11 18 54 35 18 54 ...
## $ victims : int 1 1 1 1 1 1 1 1 1 1 ...
## $ crime_name1 : Factor w/ 5 levels "Crime Against Person",..: 2 1 5 5 1 2 5 3 2 5 ...
## $ crime_name2 : Factor w/ 55 levels "Aggravated Assault",..: 44 48 3 3 48 47 3 15 47 3 ...
## $ crime_name3 : chr "Robbery - Domestic" "Assault - Simple" "Missing Person" "Lost Property" ...
## $ police_district_name : chr "Montgomery Village" "Wheaton" "Montgomery Village" "Rockville" ...
## $ block_address : chr "400 Blk N Frederick Ave" "17300 Blk Monitor Dr" "17700 Blk Lisa Dr" "10000 Blk Darnestown Rd" ...
## $ city : chr "Gaithersburg" "Olney" "Derwood" "Rockville" ...
## $ state : Factor w/ 8 levels "0","15","16",..: 6 6 6 6 6 6 6 6 6 6 ...
## $ zip_code : int 20877 20832 20855 20850 20850 20850 20886 20878 20878 20853 ...
## $ agency : Factor w/ 7 levels "GPD","MCFM","MCPD",..: 1 3 3 3 6 6 3 1 1 3 ...
## $ place : Factor w/ 83 levels "Air/Bus/Train/Metro Terminal",..: 49 58 55 15 18 18 49 65 65 58 ...
## $ sector : Factor w/ 15 levels "A","B","D","E",..: 14 8 14 2 1 1 14 13 13 9 ...
## $ beat : Factor w/ 55 levels "-PG","1A1","1A2",..: 50 30 52 6 4 4 51 45 45 34 ...
## $ pra : Factor w/ 796 levels "000","001","002",..: 558 406 710 240 281 281 556 559 559 348 ...
## $ address_number : int 400 17300 17700 10000 600 600 10000 1 1 14200 ...
## $ street_prefix : Factor w/ 4 levels "E","N","S","W": 2 NA NA NA NA NA NA NA NA NA ...
## $ street_name : chr "Frederick" "Monitor" "Lisa" "Darnestown" ...
## $ street_suffix : Factor w/ 6 levels "E","N","NE","NW",..: NA NA NA NA NA NA NA NA NA NA ...
## $ street_type : chr "Ave" "Dr" "Dr" "Rd" ...
## $ start_date_time : Factor w/ 128547 levels "01/01/2017 01:00:00 AM",..: 95422 95418 95391 95021 95054 95054 95053 95047 95047 94625 ...
## $ end_date_time : Factor w/ 60657 levels "01/01/2017 01:03:00 PM",..: NA NA NA 44428 44441 44441 NA NA NA NA ...
## $ latitude : num 39.1 39.1 39.1 39.1 39.1 ...
## $ longitude : num -77.2 -77.1 -77.1 -77.2 -77.2 ...
## $ police_district_number: Factor w/ 9 levels "1D","2D","3D",..: 6 4 6 1 1 1 6 6 6 4 ...
## $ location : Factor w/ 19610 levels "(0.0, 0.0)","(38.9384, -77.1184)",..: 14741 14414 14789 11378 10898 10898 16599 13127 13127 10675 ...
## $ incident_date : POSIXct, format: "2019-09-24 04:48:39" "2019-09-24 04:31:56" ...
## $ incident_hour : int 4 4 1 0 23 23 23 22 22 22 ...
## $ incident_minute : int 48 31 10 59 20 20 14 50 50 38 ...
## $ incident_month : num 9 9 9 9 9 9 9 9 9 9 ...
## $ incident_second : num 39 56 14 51 24 24 56 16 16 49 ...
## $ incident_year : num 2019 2019 2019 2019 2019 ...
## $ atnight : logi TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ marijuana : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ burglary : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ drugs : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
In this dataset, there are 45 unique cities that are represented in the datset.
unique(crime$city)
## [1] "Gaithersburg" "Olney" "Derwood"
## [4] "Rockville" "Montgomery Village" "Silver Spring"
## [7] "Germantown" "Bethesda" "Damascus"
## [10] "Spencerville" "Takoma Park" "Potomac"
## [13] "Boyds" "Kensington" "Chevy Chase"
## [16] "Clarksburg" "Burtonsville" NA
## [19] "Brookeville" "Dickerson" "Poolesville"
## [22] "Sandy Spring" "Mount Airy" "Ashton"
## [25] "Cabin John" "Brinklow" "Glen Echo"
## [28] "Washington Grove" "Laurel" "Beallsville"
## [31] "Hyattsville" "Beltsville" "Woodbine"
## [34] "Barnesville" "Washington" "Greenbelt"
## [37] "Highland" "Rockvillle" "Adelphi"
## [40] "Bowie" "Lanham" "Hagerstown"
## [43] "Frederick" "College Park" "Brentwood"
As you can see, there are a big amount of missing time data for this dataset. For this dataset, there is no way to get an average number to replace the NAs, so we will just omit them.
sum(is.na(crime$dispatch_date_time))
## [1] 27497
The crime I will mainly be focusing on in this project is marijuana. That is considered a crime against society. There are 49729 different occurences for crime against society. Since this is too broad, I need a more detailed type.
table(crime$crime_name1)[order(table(crime$crime_name1), decreasing = T)] # Order decreasing
##
## Crime Against Property Crime Against Society Other
## 74226 49729 34743
## Crime Against Person Not a Crime
## 16133 2154
For this Tally, this is slightly more specific than the first crime type. Since marijuana is considered a Drug/Narcotic Violation, there are 18216 different occurences. This still includes other drugs, so I need to narrow it down even more.
table(crime$crime_name2)[order(table(crime$crime_name2), decreasing = T)] # Order decreasing
##
## All Other Offenses
## 40386
## Drug/Narcotic Violations
## 18216
## Theft From Motor Vehicle
## 14437
## Simple Assault
## 11558
## Destruction/Damage/Vandalism of Property
## 10461
## Driving Under the Influence
## 10083
## Shoplifting
## 8969
## All other Larceny
## 7752
## Theft from Building
## 6758
## Liquor Law Violations
## 5562
## Burglary/Breaking and Entering
## 5041
## Disorderly Conduct
## 3806
## False Pretenses/Swindle/Confidence Game
## 3059
## Identity Theft
## 2977
## Motor Vehicle Theft
## 2933
## Trespass of Real Property
## 2699
## Theft of Motor Vehicle Parts or Accessories
## 2643
## Credit Card/Automatic Teller Machine Fraud
## 2219
## Runaway
## 2154
## Aggravated Assault
## 2150
## Robbery
## 2129
## Counterfeiting/Forgery
## 1515
## Impersonation
## 1467
## Drug Equipment Violations
## 1457
## Weapon Law Violations
## 1228
## Forcible Rape
## 679
## Fondling
## 577
## Family Offenses, NonViolent
## 452
## Pocket/picking
## 388
## Intimidation
## 299
## Embezzlement
## 263
## Bad Checks
## 259
## Forcible Fondling
## 256
## Forcible Sodomy
## 253
## Purse-snatching
## 233
## Arson
## 183
## Not Mapped
## 178
## Sexual Assault With An Object
## 158
## Stolen Property Offenses
## 155
## NOT NIBRS CODE
## 154
## Extortion/Blackmail
## 149
## Wire Fraud
## 133
## Prostitution
## 126
## Pornography/Obscene Material
## 105
## Drunkenness
## 72
## Peeping Tom
## 58
## Murder and Nonnegligent Manslaughter
## 50
## Kidnapping/Abduction
## 40
## From Coin/Operated Machine or Device
## 30
## Human Trafficking, Commercial Sex Acts
## 29
## Curfew/Loitering/Vagrancy Violations
## 28
## Assisting or Promoting Prostitution
## 14
## Justifiable Homicide
## 3
## Negligent Manslaughter
## 1
## Operating/Promoting/Assisting Gambling
## 1
For this final tally, I am focusing on only 5 different crime types in crime type 3. These all include marijuana. Adding up the 5 types, I get 12648+732+332+24+15=13751 unique occurences. We can see that Possesion of marijuana is the 2nd most popular crime in MD.
table(crime$crime_name3)[order(table(crime$crime_name3), decreasing = T)] # Order decreasing
##
## Larceny - From Auto
## 14437
## Drugs - Marijuana - Possess
## 12648
## Driving Under The Influence Liquor
## 9662
## Police Information
## 9593
## Assault - 2ND Degree
## 9382
## Larceny - Shoplifting
## 8969
## Larceny (Describe Offense)
## 7152
## Damage Property - Private
## 6547
## Larceny - From Bldg
## 6286
## Lost Property
## 6176
## Mental Illness - Emergency Petition
## 5816
## Liquor - Possess
## 3952
## Public Peace - Disorderly Conduct
## 3662
## Identity Theft
## 2977
## Sudden Death
## 2949
## Mental Illness
## 2888
## Auto Theft - Vehicle Theft
## 2773
## Missing Person
## 2729
## Trespassing
## 2699
## Larceny - Auto Parts
## 2642
## Damage Property (Describe Offense)
## 2509
## Fraud - Illegal Use Credit Cards
## 2219
## Assault - Simple
## 2176
## Juvenile - Runaway
## 2154
## Burglary - Forced Entry-Residential
## 2108
## Fraud (Describe Offense)
## 1624
## Fraud - Impersonation
## 1467
## Drugs - Narcotic Equip - Possess
## 1457
## Recovered Property - Mont Co.
## 1430
## Burglary - No Forced Entry-Residential
## 1421
## Drugs - Cocaine - Possess
## 1276
## Liquor (Describe Offense)
## 1261
## All Other Offenses
## 1128
## Damage Property - Business
## 1021
## Burglary - Forced Entry-Nonresidential
## 934
## Recovered Property - Other
## 831
## Fraud - Confidence Game
## 802
## Public Peace - Harassing Communication
## 780
## Obstruct Govt - Violation Of A Court Order
## 745
## Drugs - Marijuana - Sell
## 732
## Rape - Strong-Arm
## 662
## Forgery Of Checks
## 654
## Assault - Aggravated - Non-Family-Other Weapon
## 577
## Robbery - Street-Strong-Arm
## 574
## Suicide - Attempt
## 573
## Drugs - Opium Or Derivative - Possess
## 564
## Obstruct Police - Resisting Officer
## 560
## Unauthorized Use Of Vehicle (Includes Joy Riding)
## 544
## Weapon - Possession
## 517
## Weapon - Concealed
## 495
## Drugs - Cocaine - Sell
## 457
## Fraud - Swindle
## 448
## Larceny - From Malls
## 438
## Assault - Aggravated - Family-Other Weapon
## 422
## Driving Under The Influence Drugs
## 421
## Drugs - Heroin - Possess
## 401
## Larceny - Pocket Picking
## 388
## Damage Property - Public
## 377
## Assault - Aggravated - Other
## 343
## Liquor - Sell
## 337
## Drugs - Marijuana (Describe Offense)
## 332
## Sex Assault - Fondling
## 331
## Overdose
## 326
## Larceny - From Yards
## 316
## Drugs - Hallucinogen - Possess
## 313
## Mental Transport - Only
## 311
## Drugs - Amphetamine - Possess
## 296
## Assault - Aggravated - Family-Strong-Arm
## 290
## Family Offense - Neglect Child (Includes Nonsuppor
## 285
## Burglary (Describe Offense)
## 284
## Burglary - No Forced Entry-Nonresidential
## 277
## Obstruct Police (Describe Offense)
## 277
## Fraud - Insufficient Funds Check
## 259
## Larceny - Postal
## 254
## Sex Offense - Fondling
## 246
## Obstruct Police - Obstruct Criminal Invest
## 245
## Littering/Trash Dumping
## 242
## Fugitive From Md Jurisdiction
## 235
## Assault - Intimidation (Includes Stalking)
## 233
## Larceny - Purse Snatching - No Force
## 233
## Drugs - Synthetic Narcotic - Possess
## 229
## Fugitive From Justice (Out Of State)
## 229
## Robbery - Street-Gun
## 228
## Forgery (Describe Offense)
## 217
## Robbery - Strong Arm
## 216
## Sex Offense - Indecent Exposure
## 215
## Sex Offense - Against Child-Fondling
## 203
## Suicide - Completed
## 201
## Assault - Aggravated - Gun
## 189
## Counterfeiting - Pass Counterfeited
## 183
## Embezzle (Describe Offense)
## 182
## Fraud - Failure To Pay
## 178
## Sex Offense - Indecent Exposure To Adult
## 159
## Sex Assault - With An Object
## 158
## Traffic Offenses (Describe Offense)
## 153
## Drugs - Barbiturate - Possess
## 152
## Stolen Vehicle (Describe Offense)
## 152
## Public Peace (Describe Offense)
## 144
## Juvenile - Other
## 141
## Fire (Not Arson)
## 140
## Robbery - Street-Other Weapon
## 135
## Drugs - Dangerous (Describe Offense)
## 133
## Fraud By Wire
## 133
## Forgery Of Other
## 127
## Robbery - Business-Gun
## 126
## Fraud And Abuse - Computer
## 120
## Comm Sex Off - Prostitution
## 119
## Robbery - Gun
## 114
## Assault - Aggravated - Non-Family-Strong-Arm
## 112
## Robbery - Knife
## 112
## Sex Assault - Sodomy-Girl-Strong-Arm
## 109
## Sex Offense - Failure To Register As A Sex Offende
## 109
## Forgery - Pass Forged
## 100
## Drugs - Opium Or Derivative - Sell
## 98
## Weapon - Firing
## 94
## Robbery (Describe Offense)
## 90
## Auto Theft - Possess Stolen Vehicle
## 88
## Assault - Aggravated - Non-Family-Gun
## 86
## Counterfeiting
## 86
## Drugs - Cocaine (Describe Offense)
## 86
## Robbery - Residential-Strong-Arm
## 84
## Family Offense - Cruelty Toward Child
## 82
## Weapon Offense (Describe Offense)
## 82
## Drugs - Heroin - (Describe Offense)
## 80
## Drugs - Heroin - Sell
## 78
## Robbery - Business-Strong-Arm
## 78
## Arson - Burning Of - (Identify Object)
## 75
## Robbery - Domestic
## 75
## Drunkenness
## 72
## Obscene Communication
## 70
## Robbery - Residential-Gun
## 70
## Sex Assault - Sodomy-Woman-Strong-Arm
## 68
## Counterfeiting (Describe Offense)
## 67
## Public Order Crimes
## 67
## Obstruct Police - Making False Report
## 65
## Drugs - Opium Or Derivative (Describe Offense)
## 62
## Threat - Terroristic - State Offenses
## 60
## Weapon - Threat To Bomb
## 59
## Robbery - Business-Other Weapon
## 58
## Sex Offense - Peeping Tom
## 58
## Drugs - Amphetamine - Sell
## 57
## Embezzle - Business Prop
## 57
## Counterfeiting - Poss Counterfeited
## 56
## Extortion (Describe Offense)
## 56
## Sex Assault - Carnal Abuse
## 53
## Sex Offense - Indecent Exposure To Minor
## 49
## Obstruct Govt - Obstructing Justice
## 48
## Property Crimes
## 48
## Extort - Threat Injure Person
## 46
## Family Offense (Describe Offense)
## 44
## Fraud - False Statement
## 42
## Sex Assault - Sodomy-Boy-Strong-Arm
## 42
## Extort - Threat Injure Reputation
## 41
## Robbery - Carjacking - Armed
## 41
## Comm Sex Off - Procure Prostitute - Adult
## 40
## Assault - Aggravated - Pol Off-Other Weapon
## 39
## Drugs - Synthetic Narcotic - Sell
## 39
## Family Offense - Neglect Family
## 39
## Obscene Material
## 39
## Stolen Property - Possess
## 39
## Arson - Residential
## 36
## Assault - Aggravated - Family-Gun
## 36
## Larceny - From Banking-Type Inst
## 34
## Arson (Describe Offense)
## 32
## Drugs - Hallucinogen - Sell
## 32
## Hit And Run
## 32
## Robbery - Other Weapon
## 32
## Robbery - Residential-Other Weapon
## 32
## Obscene Material - Distribute
## 31
## Obstruct Govt - Obstructing Court Order
## 31
## Larceny - From Coin Machine
## 30
## Human Trafficking - Commercial Sex Acts
## 29
## Obscene Material - Possess
## 29
## Robbery - Banking-Type Inst
## 29
## Drugs - Hallucinogen (Describe Offense)
## 28
## Loitering/Vagrancy
## 28
## Animal Bite
## 27
## Drugs - Hallucinogen - Distrib
## 26
## Burglary Tools - Possess
## 25
## Forgery - Possess Forged
## 25
## Sex Assault - Sodomy-Man-Strong-Arm
## 25
## Arson - Residential-Endangered Life
## 24
## Drugs - Marijuana - Producing
## 24
## Fraud - Mail
## 23
## Carrying Prohibited Weapon (Specify Weapon)
## 22
## Larceny - From Shipment
## 22
## Assault - Aggravated - Pub Off-Other Weapon
## 21
## Embezzle - Banking-Type Inst
## 21
## Sex Offense - Sex Offender Registration Violation
## 21
## Solicitation - Minor For Sexual Acts
## 21
## Fire Code Violation
## 20
## Obstruct Police - Evidence - Destroying
## 19
## Drugs - Synthetic Narcotic - (Describe Offense)
## 18
## Robbery - Carjacking - Strong-Arm
## 18
## Robbery - Forcible Purse Snatching
## 17
## Burglary - Safe-Vault
## 16
## Kidnap Adult
## 16
## Stolen Property (Describe Offense)
## 16
## Drugs - Marijuana - Smuggle
## 15
## Drugs - Barbiturate - Sell
## 14
## Escape From Custody
## 13
## Health - Safety (Describe Offense)
## 13
## Assault - Aggravated - Pol Off-Strong-Arm
## 12
## Attempted Assault Aggravated Pol Off Knife
## 12
## Drugs - Amphetamine (Describe Offense)
## 12
## Homicide - Willful Kill-Gun
## 12
## Obstruct Govt - Failure To Appear
## 12
## Rape With Weapon
## 12
## Homicide (Describe Offense)
## 11
## Liquor - Transport
## 11
## Solicitation - Adult For Sexual Acts
## 10
## Crimes Against Person
## 9
## False Imprisonment - Minor - Nonparental
## 9
## Homicide - Willful Kill-Family
## 9
## Weapon - Explosives - Possessing
## 9
## Home Improvement Violation
## 8
## Homicide - Willful Kill
## 8
## Kidnap Minor
## 8
## Comm Sex Off - Homosexual Prostitution
## 7
## Comm Sex Off (Describe Offense)
## 7
## Stolen Property - Receive
## 7
## Weapon - Threat To Burn
## 7
## Arson - Business-Endangered Life
## 6
## Family Offense - Contributing To The Delinquency O
## 6
## Juvenile - Out Of Control
## 6
## Kidnapping (Describe Offense)
## 6
## Obstruct Govt - Contempt Of Court
## 6
## Assault - Aggravated - Pub Off-Strong-Arm
## 5
## Comm Sex Off - Procure For Prostitute (Pimping)
## 5
## Damage Property - Private-With Explosive
## 5
## Failing To Move On
## 5
## Forgery/Counterfeiting - Possess Tools For
## 5
## Health - Drugs - Health Or Safety
## 5
## Homicide - Willful Kill-Family-Gun
## 5
## Morals - Decency Crimes
## 5
## Obstruct - (Specify Judic, Congr, Legis, Commsn)
## 5
## Obstruct Govt - Probation Violation
## 5
## Rape - Gun
## 5
## Sex Assault - Sodomy-Man-Weapon
## 5
## Smuggling - Contraband Into Prison
## 5
## Abduct - No Ransom Or Assault
## 4
## Arson - Pub-Bldg
## 4
## Arson - Pub-Bldg-Endangered Life
## 4
## Assault - Aggravated - Pol Off-Gun
## 4
## Auto Theft - Theft And Use Vehicle Other Crime
## 4
## Escape
## 4
## Health - Drugs - Misbranded
## 4
## Kidnap Minor - Nonparental
## 4
## Larceny - From Interstate Shipment
## 4
## Larceny - Theft Of Us Government Property
## 4
## Obscene Material - Manufacture
## 4
## Auto Theft - Theft And Sale Vehicle
## 3
## Comm Sex Off - Procure Prostitute - Minor
## 3
## Drugs - Barbiturate (Describe Other)
## 3
## Drugs - Heroin - Smuggle
## 3
## Drugs - Opium Or Derivative - Smuggle
## 3
## Extort - Threat Accuse Person Of Crime
## 3
## Failure To Appear - Non Support
## 3
## Homicide - Justifiable
## 3
## Homicide - Willful Kill-Non-Family-Gun
## 3
## Income Tax (Describe Offense)
## 3
## Weapon - Incendiary Device - Possess
## 3
## Arson - Business
## 2
## Assault - Aggravated - Pub Off-Gun
## 2
## Drugs - Hallucinogen - Mfr
## 2
## Drugs - Synthetic Narcotic - Smuggle
## 2
## Embezzle - Interstate Shipment
## 2
## Escape - Flight To Avoid
## 2
## Extort - Threat Of Informing Of Violence
## 2
## False Citizenship
## 2
## Family Offense - Cruelty Toward Wife
## 2
## Homicide - Willful Kill-Non-Family
## 2
## Homosexual Act With Man
## 2
## Illegal Entry
## 2
## Invasion Privacy (Describe Offense)
## 2
## Money Laundering (Describe Offense)
## 2
## Obscenity (Describe Offense)
## 2
## Obstruct Police - Illegal Arrest
## 2
## Obstruct Police - Witness - Dissuading
## 2
## Receive Stolen Vehicle
## 2
## Stolen Property - Sale Of
## 2
## Weapon - Explosives - Using
## 2
## Weapon - Incendiary Device - Using
## 2
## Auto Theft - Strip Stolen Vehicle
## 1
## Auto Theft - Theft And Strip Vehicle
## 1
## Burglary - Banking-Type Inst
## 1
## Comm Sex Off - Keeping House Ill Fame
## 1
## Comm Sex Off - Transport Female Interstate For Imm
## 1
## Compounding Crime
## 1
## Conservation - Animals (Describe Offense)
## 1
## Damage Property - Business-With Explosive
## 1
## Damage Property - Public-With Explosive
## 1
## Drugs - Cocaine - Smuggle
## 1
## Embezzle - Public Prop
## 1
## Extort - Threat Damage Prop
## 1
## False Imprisonment - Minor - Parental
## 1
## Gambling - Dice Game
## 1
## Homicide - Negligent Manslaughter
## 1
## Homosexual Act With Boy
## 1
## Invasion Privacy - Opening Sealed Communication
## 1
## Invasion Privacy - Wiretap - Failure To Report
## 1
## Kidnap - Minor Parental
## 1
## Kidnap Adult To Sexually Assault
## 1
## Liquor - Manufacture
## 1
## Non Support
## 1
## Obstruct Govt - Misconduct - Judic Officer
## 1
## Obstruct Police - Witness - Deceiving
## 1
## Public Peace - Assembly - Unlawful
## 1
## Sex Assault - Sodomy-Girl-Gun
## 1
## Sex Assault - Sodomy-Girl-Weapon
## 1
## Sex Assault - Sodomy-Woman-Weapon
## 1
## Smuggling - Contraband
## 1
## Stolen Property - Transport Interstate
## 1
## Threat - Federal Protectees
## 1
## Transporting Dangerous Materials
## 1
## Weapon - Altering Identification On
## 1
## Weapon - Selling
## 1
Next I have a look at the frequency of crimes by month. But I don’t like the way the table function displays information so I will use dplyr (mentioned above), which provides nicer output and uses a far more legible syntax. Looking at our results, you can see that July and August are the most popular months for crimes.
group_by(.data = crime, incident_month) %>%
filter(!is.na(incident_month))%>%
summarise(count = n()) %>%
arrange(desc(count))
## # A tibble: 12 x 2
## incident_month count
## <dbl> <int>
## 1 7 15193
## 2 8 15158
## 3 9 14209
## 4 10 12597
## 5 12 12182
## 6 11 11880
## 7 3 11727
## 8 1 11606
## 9 5 11506
## 10 6 11316
## 11 4 11118
## 12 2 11040
Next I have a look at the frequency of marijuana crimes by month. Looking at our results, you can see that July is the most popular month for Marijuana crimes.
group_by(.data = crime_marijuana, incident_month) %>%
filter(!is.na(incident_month))%>%
summarise(count = n()) %>%
arrange(desc(count))
## # A tibble: 12 x 2
## incident_month count
## <dbl> <int>
## 1 7 57
## 2 3 54
## 3 10 52
## 4 1 49
## 5 12 48
## 6 8 40
## 7 2 39
## 8 4 36
## 9 9 35
## 10 6 34
## 11 11 34
## 12 5 31
Next I have a look at the frequency of burglary crimes by month. Looking at our results, you can see that again July and August are the most popular months.
group_by(.data = crime_burglary, incident_month) %>%
filter(!is.na(incident_month))%>%
summarise(count = n()) %>%
arrange(desc(count))
## # A tibble: 12 x 2
## incident_month count
## <dbl> <int>
## 1 7 535
## 2 8 519
## 3 10 427
## 4 9 424
## 5 1 411
## 6 12 384
## 7 5 371
## 8 11 371
## 9 6 343
## 10 2 337
## 11 3 310
## 12 4 307
We can see that July, August and September seem to have the most arrests in MD.
ggplot(data=crime, aes(x=incident_month))+geom_bar(aes(fill=atnight))+labs(title="Arrests in Montgomery County by Month")+ theme(plot.title = element_text(hjust = 0.5))+scale_x_discrete(name ="month",
limits=c("1","2","3","4","5","6","7","8","9","10","11", "12"))+scale_fill_discrete(name = "Time of Day", labels = c("Daytime", "Nightime"))
## Warning: Removed 27497 rows containing non-finite values (stat_count).
We can see that July, August and September seem to have the most arrests in MD.
ggplot(data=crime_burglary, aes(x=incident_month))+geom_bar(aes(fill=atnight))+labs(title="Burglary Arrests in Montgomery County by Month")+ theme(plot.title = element_text(hjust = 0.5))+scale_x_discrete(name ="month",
limits=c("1","2","3","4","5","6","7","8","9","10","11", "12"))+scale_fill_discrete(name = "Time of Day", labels = c("Daytime", "Nightime"))
## Warning: Removed 302 rows containing non-finite values (stat_count).
ggplot(data=crime, aes(x=incident_month))+geom_bar(position="fill",aes(fill=atnight))+labs(title="Arrests in Montgomery County by Month by Percentage")+ theme(plot.title = element_text(hjust = 0.5))+scale_x_discrete(name ="month",
limits=c("1","2","3","4","5","6","7","8","9","10","11", "12"))+scale_fill_discrete(name = "Time of Day", labels = c("Daytime", "Nightime"))
## Warning: Removed 27497 rows containing non-finite values (stat_count).
ggplot(data=subset(crime%>%filter(!is.na(drugs))
, !is.na(incident_hour)),aes(x=incident_hour)) +geom_bar(aes(fill=drugs))+labs(title="Arrests in Montgomery County by Hour",x="Incident Hour")+ theme(plot.title = element_text(hjust = 0.5))+scale_x_discrete(name ="Hour")+scale_x_continuous(breaks=0:23,labels=c("0","1","2","3","4","5","6","7","8","9","10","11", "12", "13", "14", "15", "16", "17","18","19","20", "21", "22", "23"))
## Scale for 'x' is already present. Adding another scale for 'x', which
## will replace the existing scale.
ggplot(data=subset(crime_marijuana, !is.na(incident_hour)),aes(x=incident_hour)) +geom_bar(fill="blue")+labs(title="Marijuana Arrests in Montgomery County by Hour")+ theme(plot.title = element_text(hjust = 0.5))+scale_x_discrete(name ="Hour")+scale_x_continuous(breaks=0:23,labels=c("0","1","2","3","4","5","6","7","8","9","10","11", "12", "13", "14", "15", "16", "17","18","19","20", "21", "22", "23"))
## Scale for 'x' is already present. Adding another scale for 'x', which
## will replace the existing scale.
#labels=c("0","1","2","3","4","5","6","7","8","9","10","11", "12", "13", "14", "15", "16", "17","18","19","20", "21", "22", "23"))
The first visualization I made shows which agency had the most amount of calls. Clearly, it is very obvious that MCPD was the busiest, with more than half the calls being answered by this agency.
ggplot(data=crime_marijuana, aes(x=agency))+geom_bar()+labs(title="Marijuana Arrests in Montgomery County")+ theme(plot.title = element_text(hjust = 0.5))
The second visualization is based on arrests in the daytime vs the Night time. As shown in the graph, Daytime arrests are much more popular compared to nightime arrests.
P<-ggplot(data=subset(crime, !is.na(atnight)), aes(x=atnight)) + geom_bar(aes(fill=atnight))
P+ theme(legend.position = "none")+labs(title=" Montgomery County Crimminal Arrests in Daytime vs Nightime",x="Time of the day")+ theme(plot.title = element_text(hjust = 0.5))+ scale_x_discrete(labels=c("FALSE"="Daytime","TRUE"="Nightime"))
The visualization is based on Marijuana arrests in the daytime vs the Night time. As shown in the graph, Daytime arrests are slightly more frequent than Night time arrests.
P<-ggplot(data=subset(crime_marijuana, !is.na(atnight)), aes(x=atnight)) + geom_bar(aes(fill=atnight))
P+ theme(legend.position = "none")+labs(title=" Montgomery County Marijuana Arrests in Daytime vs Nightime", x="Time of the day")+ theme(plot.title = element_text(hjust = 0.5))+scale_x_discrete(labels=c("FALSE"="Daytime","TRUE"="Nightime"))
The visualization is based on burglary arrests in the daytime vs the Night time. As shown in the graph, Daytime arrests are slightly more frequent than Night time arrests. This is quite suprising since you would expect people to burglarize houses late at night usually.
P<-ggplot(data=subset(crime_burglary, !is.na(atnight)), aes(x=atnight)) + geom_bar(aes(fill=atnight))
P+ theme(legend.position = "none")+labs(title="Montgomery County Burglary Arrests in Daytime vs Nightime", x="Time of the day")+ theme(plot.title = element_text(hjust = 0.5))+scale_x_discrete(labels=c("FALSE"="Daytime","TRUE"="Nightime"))
y<-ggplot(data=subset(crime,!is.na(crime_name1)),aes(x=crime_name1, fill=city))+geom_bar()+ theme(axis.text.x = element_text(angle=60, hjust=1))
y+labs(title="Crimes commited by City", x="Crime Type")+ scale_fill_discrete(na.translate=FALSE)
crime_sub <- crime[1:100,] # display the first 100 rows
#using the stamen mapping function, create boundaries
md_bb <- c(left = -77.55,
bottom = 38.92,
right = -76.88,
top = 39.36)
#get the map outline
md_stamen <- get_stamenmap(bbox = md_bb,
zoom = 11)
## Source : http://tile.stamen.com/terrain/11/582/780.png
## Source : http://tile.stamen.com/terrain/11/583/780.png
## Source : http://tile.stamen.com/terrain/11/584/780.png
## Source : http://tile.stamen.com/terrain/11/585/780.png
## Source : http://tile.stamen.com/terrain/11/586/780.png
## Source : http://tile.stamen.com/terrain/11/582/781.png
## Source : http://tile.stamen.com/terrain/11/583/781.png
## Source : http://tile.stamen.com/terrain/11/584/781.png
## Source : http://tile.stamen.com/terrain/11/585/781.png
## Source : http://tile.stamen.com/terrain/11/586/781.png
## Source : http://tile.stamen.com/terrain/11/582/782.png
## Source : http://tile.stamen.com/terrain/11/583/782.png
## Source : http://tile.stamen.com/terrain/11/584/782.png
## Source : http://tile.stamen.com/terrain/11/585/782.png
## Source : http://tile.stamen.com/terrain/11/586/782.png
## Source : http://tile.stamen.com/terrain/11/582/783.png
## Source : http://tile.stamen.com/terrain/11/583/783.png
## Source : http://tile.stamen.com/terrain/11/584/783.png
## Source : http://tile.stamen.com/terrain/11/585/783.png
## Source : http://tile.stamen.com/terrain/11/586/783.png
alldrugs<-ggmap(md_stamen) +
geom_point(data=drugsatnight, mapping = aes(x = longitude,
y = latitude, color=crime_name3),alpha=0.3)+labs(title="Heat Map for All Drugs at Nightime")+theme(plot.title = element_text(hjust = 0.5))
alldrugs
## Warning: Removed 51 rows containing missing values (geom_point).
marijuana<-ggmap(md_stamen) +
geom_point(data=generalmarijuanaatnight, mapping = aes(x = longitude,
y = latitude, color=crime_name3),alpha=0.3)+labs(title="Heat Map for All Marijuana Crimes at Nightime")
marijuana
## Warning: Removed 40 rows containing missing values (geom_point).
marijuana<-ggmap(md_stamen) +
geom_point(data=crime_marijuana, mapping = aes(x = longitude, y = latitude, color=crime_name3))+labs(title="Heat Map for Marijuana Crimes")+theme(plot.title = element_text(hjust = 0.5))
marijuana
## Warning: Removed 5 rows containing missing values (geom_point).
burglary<-ggmap(md_stamen) +
geom_point(data=crime_burglarynight, mapping = aes(x = longitude, y = latitude, color=crime_name3),alpha=0.5)+labs(title="Heat Map for Burglary Crimes")+theme(plot.title = element_text(hjust = 0.5))
burglary
## Warning: Removed 4 rows containing missing values (geom_point).
#Tableau Visualizations https://public.tableau.com/profile/jeffrey.hou#!/vizhome/Crime_15724130808130/Sheet2